{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## **作业：实现通用的批量梯度下降算法模块**\n",
    "根据函数$y = 3 {x_1}^2 - 5 x_1 x_2 + 2 {x_2}^2 - 8 x_1 - 10 x_2 + 6$生成若干原始数据点,  \n",
    "要求使用梯度下降算法拟合出函数：$h(w, x) = w_5 {x_1}^2 + w_4 x_1 x_2 + w_3 {x_2}^2 + w_2 x_1 + w_1 x_2 + w_0$的系数$w$，  \n",
    "使得该函数与上述原始数据点具有最小二乘解。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### **任务1：了解待拟合的数据源**\n",
    "下面的代码生成了待拟合的数据源，变量x1和x2形成网格数据点，共36个"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "#1, (-3.00,-3.00),60.63\n",
      "#2, (-3.00,-1.80),55.18\n",
      "#3, (-3.00,-0.60),54.84\n",
      "#4, (-3.00,0.60),61.03\n",
      "#5, (-3.00,1.80),73.38\n",
      "#6, (-3.00,3.00),90.36\n",
      "#7, (-1.80,-3.00),52.08\n",
      "#8, (-1.80,-1.80),38.79\n",
      "#9, (-1.80,-0.60),32.12\n",
      "#10, (-1.80,0.60),30.39\n",
      "#11, (-1.80,1.80),34.84\n",
      "#12, (-1.80,3.00),45.79\n",
      "#13, (-0.60,-3.00),51.61\n",
      "#14, (-0.60,-1.80),30.98\n",
      "#15, (-0.60,-0.60),17.28\n",
      "#16, (-0.60,0.60),9.28\n",
      "#17, (-0.60,1.80),6.12\n",
      "#18, (-0.60,3.00),9.84\n",
      "#19, (0.60,-3.00),59.31\n",
      "#20, (0.60,-1.80),32.54\n",
      "#21, (0.60,-0.60),11.74\n",
      "#22, (0.60,0.60),-4.38\n",
      "#23, (0.60,1.80),-14.35\n",
      "#24, (0.60,3.00),-18.16\n",
      "#25, (1.80,-3.00),76.80\n",
      "#26, (1.80,-1.80),42.52\n",
      "#27, (1.80,-0.60),14.12\n",
      "#28, (1.80,0.60),-9.20\n",
      "#29, (1.80,1.80),-26.23\n",
      "#30, (1.80,3.00),-37.45\n",
      "#31, (3.00,-3.00),102.78\n",
      "#32, (3.00,-1.80),60.69\n",
      "#33, (3.00,-0.60),25.42\n",
      "#34, (3.00,0.60),-5.23\n",
      "#35, (3.00,1.80),-29.01\n",
      "#36, (3.00,3.00),-47.05\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "\n",
    "# 用于生成样本数据点的函数 \n",
    "def f(x1, x2):\n",
    "    # np.random.random()是为了给样本数据一个小的扰动\n",
    "    return 3 * x1 * x1 - 5 * x1 * x2 + 2 * x2 * x2 - 8 * x1 - 10 * x2 + 6 + np.random.random()\n",
    "\n",
    "# 给定一批原始数据点的x1和x2两个方向坐标\n",
    "x1 = np.linspace(-3, 3, 6)\n",
    "x2 = np.linspace(-3, 3, 6)\n",
    "y = np.zeros(len(x1) * len(x2))\n",
    "# 根据x1和x2中的每个组合，分别计算对应的y值\n",
    "y_index = 0\n",
    "for i in np.arange(len(x1)):\n",
    "    for j in np.arange(len(x2)):\n",
    "        y[y_index] = f(x1[i], x2[j])\n",
    "        y_index += 1\n",
    "        # 打印每个数据点的坐标（x1, x2, y)\n",
    "        print(\"#{0}, ({1:.2f},{2:.2f}),{3:.2f}\".format(y_index, x1[i], x2[j], y[y_index-1]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### **任务2：了解扩展的自变量维度矩阵**\n",
    "下面的代码将任务1中36组数据点$(x_1,x_2) \\to y$扩展成：$({x_1}^2, x_1 x_2, {x_2}^2, x_1, x_2, 1) \\to y$的形式  \n",
    "维度矩阵包含6列的数据："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "X维度矩阵：\n",
      "[[ 9.    9.    9.   -3.   -3.    1.  ]\n",
      " [ 9.    5.4   3.24 -3.   -1.8   1.  ]\n",
      " [ 9.    1.8   0.36 -3.   -0.6   1.  ]\n",
      " [ 9.   -1.8   0.36 -3.    0.6   1.  ]\n",
      " [ 9.   -5.4   3.24 -3.    1.8   1.  ]\n",
      " [ 9.   -9.    9.   -3.    3.    1.  ]\n",
      " [ 3.24  5.4   9.   -1.8  -3.    1.  ]\n",
      " [ 3.24  3.24  3.24 -1.8  -1.8   1.  ]\n",
      " [ 3.24  1.08  0.36 -1.8  -0.6   1.  ]\n",
      " [ 3.24 -1.08  0.36 -1.8   0.6   1.  ]\n",
      " [ 3.24 -3.24  3.24 -1.8   1.8   1.  ]\n",
      " [ 3.24 -5.4   9.   -1.8   3.    1.  ]\n",
      " [ 0.36  1.8   9.   -0.6  -3.    1.  ]\n",
      " [ 0.36  1.08  3.24 -0.6  -1.8   1.  ]\n",
      " [ 0.36  0.36  0.36 -0.6  -0.6   1.  ]\n",
      " [ 0.36 -0.36  0.36 -0.6   0.6   1.  ]\n",
      " [ 0.36 -1.08  3.24 -0.6   1.8   1.  ]\n",
      " [ 0.36 -1.8   9.   -0.6   3.    1.  ]\n",
      " [ 0.36 -1.8   9.    0.6  -3.    1.  ]\n",
      " [ 0.36 -1.08  3.24  0.6  -1.8   1.  ]\n",
      " [ 0.36 -0.36  0.36  0.6  -0.6   1.  ]\n",
      " [ 0.36  0.36  0.36  0.6   0.6   1.  ]\n",
      " [ 0.36  1.08  3.24  0.6   1.8   1.  ]\n",
      " [ 0.36  1.8   9.    0.6   3.    1.  ]\n",
      " [ 3.24 -5.4   9.    1.8  -3.    1.  ]\n",
      " [ 3.24 -3.24  3.24  1.8  -1.8   1.  ]\n",
      " [ 3.24 -1.08  0.36  1.8  -0.6   1.  ]\n",
      " [ 3.24  1.08  0.36  1.8   0.6   1.  ]\n",
      " [ 3.24  3.24  3.24  1.8   1.8   1.  ]\n",
      " [ 3.24  5.4   9.    1.8   3.    1.  ]\n",
      " [ 9.   -9.    9.    3.   -3.    1.  ]\n",
      " [ 9.   -5.4   3.24  3.   -1.8   1.  ]\n",
      " [ 9.   -1.8   0.36  3.   -0.6   1.  ]\n",
      " [ 9.    1.8   0.36  3.    0.6   1.  ]\n",
      " [ 9.    5.4   3.24  3.    1.8   1.  ]\n",
      " [ 9.    9.    9.    3.    3.    1.  ]]\n"
     ]
    }
   ],
   "source": [
    "POLY_COUNT = 6                  # 2阶多项式一共6项\n",
    "def map_data(X1, X2):           # 生成两个变量的多元一阶矩阵\n",
    "    variables = np.ones((len(X1) * len(X2), POLY_COUNT))\n",
    "    y = np.zeros((len(X1) * len(X2)))\n",
    "    row_index = 0\n",
    "    for i in np.arange(len(X1)):\n",
    "        for j in np.arange(len(X2)):\n",
    "            row = variables[row_index]\n",
    "            row[0] = X1[i] * X1[i]\n",
    "            row[1] = X1[i] * X2[j]\n",
    "            row[2] = X2[j] * X2[j]\n",
    "            row[3] = X1[i]\n",
    "            row[4] = X2[j]\n",
    "            y[row_index] = f(X1[i], X2[j])\n",
    "            #row[5] = 1\n",
    "            row_index += 1\n",
    "    return (variables, y)\n",
    "\n",
    "x_ext, y = map_data(x1, x2)\n",
    "print(\"X维度矩阵：\")\n",
    "print(x_ext)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### **任务3：阅读理解通用的批量梯度下降算法函数**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def batch_gradient_descent(target_fn, gradient_fn, init_W, X, Y, learning_rate=0.001, tolerance=1e-7):\n",
    "    \"\"\"支持多变量的批量梯度下降法\"\"\"\n",
    "    # 假设函数为：y = wn * xn + w(n-1) * x(n-1) +... + w2 * x2 + w1 * x1 + w0 * x0 其中，x0为1\n",
    "    # X中：第一列为xn,第二列为x(n-1)，依次类推，最后一列为x0(全为1)\n",
    "    # W向量顺序是：wn,w(n-1),...w1,w0，要确保与X中各列顺序一致\n",
    "    W = init_W\n",
    "    target_value = target_fn(W, X, Y) \n",
    "    iter_count = 0\n",
    "    while iter_count < 50000:                      # 如果50000次循环仍未收敛，则认为无法收敛\n",
    "        gradient = gradient_fn(W, X, Y)\n",
    "        next_W = W - gradient * learning_rate \n",
    "        next_target_value = target_fn(next_W, X, Y)\n",
    "        if abs(target_value - next_target_value) < tolerance:\n",
    "            print(\"循环\", iter_count, \"次后收敛\")\n",
    "            return W\n",
    "        else:                                             \n",
    "            W, target_value = next_W, next_target_value\n",
    "            iter_count += 1\n",
    "    \n",
    "    print(\"50000次循环后，计算仍未收敛\")\n",
    "    return W"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### **任务4：编写目标函数、梯度函数，并调用批量梯度下降函数完成拟合**\n",
    "* 定位下面的代码框架中所有的\"### TODO\"，按照提示完成代码编写\n",
    " * 编写完成target_function\n",
    " * 编写完成gradient_function\n",
    " * 调整learning_rate的值使批量梯度下降计算较快较好的收敛\n",
    " * 编写完成batch_gradient_descent的函数调用\n",
    "* 运行代码，观察拟合出来的系数，应该与3, -5, 2, -8, 10, 6接近"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "### TODO:实现下列目标函数（按最小二乘法）\n",
    "def target_function(W, X, Y):                           # 修改return返回值\n",
    "    return 0.0\n",
    "\n",
    "### TODO:实现下列梯度函数（返回梯度数组）\n",
    "def gradient_function(W, X, Y):                         # 修改return返回值\n",
    "    return 0.0\n",
    "\n",
    "np.random.seed(0)\n",
    "init_W = np.random.randn(x_ext.shape[1])                # x_ext现在是具有6个列的矩阵，因此init_W需要6个初始元素\n",
    "\n",
    "### TODO:调整learning_rate使批量梯度下降计算较快较好的收敛\n",
    "learning_rate = 0.05                                    # 可调整learning_rate\n",
    "\n",
    "tolerance = 1e-7                                        # 可调整tolerance\n",
    "\n",
    "### TODO：调用batch_gradient_descent计算拟合系数，请填充其参数\n",
    "W = batch_gradient_descent(# 填充参数 #)\n",
    "\n",
    "print(\"W值\", W)                                         \n",
    "print(\"拟合曲线计算的数据值与真实值的差异：\", x_ext.dot(W) - y)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
